business <- read_csv("./data/business.csv") %>%
mutate(name = str_sub(name, 2, -2)) %>%
mutate(address = str_sub(address, 2, -2)) %>%
filter(neighborhood != "Downtown Tampa" & neighborhood != "North Valley")
categories <- business %>%
select(business_id, categories) %>%
separate(categories, into = c("cat1", "cat2", "cat3", "cat4", "cat5", "cat6", "cat7", "cat8", "cat9", "cat10", "cat11", "cat12", "cat13", "cat14", "cat15", "cat16", "cat17", "cat18", "cat19", "cat20", "cat21", "cat22", "cat23", "cat24", "cat25"), sep = ";") %>%
gather(key = "cat_id", value = "category", starts_with("cat")) %>%
select(-cat_id)
restaurant_ids <- categories %>%
filter(category == "Restaurants") %>%
distinct(business_id)
restaurants <- business %>%
mutate(is_rest = ifelse(str_detect(categories, ";Restaurant") == TRUE, 1,
ifelse(str_match(categories, ";Food") == TRUE, 1, 0))) %>%
filter(is_rest == 1) %>%
select(-is_rest) attributes <- read_csv("./data/attributes.csv") %>%
janitor::clean_names() %>%
select(business_id, alcohol)restaurants <- restaurants %>%
left_join(attributes, by = "business_id")restaurants %>%
group_by(neighborhood) %>%
count() %>%
arrange(n) %>%
knitr::kable()restaurants <- restaurants %>%
left_join(attributes, by = "business_id")
restaurants %>%
group_by(neighborhood) %>%
count() %>%
arrange(n) %>%
knitr::kable()
>>>>>>> 63202e9a2379c7f7d5f2807943011ecd8f3bbc2f
| neighborhood | n |
|---|---|
| Anthem | 10 |
| The Lakes | 19 |
| University | 55 |
| South Summerlin | 64 |
| Summerlin | 95 |
| Centennial | 111 |
| Sunrise | 129 |
| Northwest | 143 |
| Southwest | 152 |
| Chinatown | 224 |
| Downtown | 277 |
| Spring Valley | 347 |
| Eastside | 357 |
| Southeast | 431 |
| Westside | 449 |
| The Strip | 613 |
restaurants %>%
select(business_id, neighborhood, stars) %>%
distinct() %>%
group_by(neighborhood, stars) %>%
tally %>%
rename(my_count = n) %>%
spread(key = stars, value = my_count) %>%
knitr::kable()| neighborhood | 1 | 1.5 | 2 | 2.5 | 3 | 3.5 | 4 | 4.5 | 5 |
|---|---|---|---|---|---|---|---|---|---|
| Anthem | NA | NA | 1 | NA | 2 | 3 | 2 | 2 | NA |
| Centennial | 1 | 4 | 4 | 14 | 25 | 29 | 21 | 11 | 2 |
| Chinatown | NA | 1 | 6 | 9 | 35 | 59 | 65 | 42 | 7 |
| Downtown | 1 | 4 | 9 | 15 | 40 | 56 | 89 | 45 | 18 |
| Eastside | 1 | 9 | 24 | 38 | 58 | 72 | 100 | 48 | 7 |
| Northwest | NA | 3 | 9 | 17 | 25 | 41 | 30 | 13 | 5 |
| South Summerlin | NA | NA | 1 | 4 | 11 | 24 | 23 | 1 | NA |
| Southeast | 1 | 9 | 34 | 52 | 62 | 104 | 107 | 45 | 17 |
| Southwest | 1 | 4 | 9 | 16 | 21 | 38 | 42 | 18 | 3 |
| Spring Valley | NA | 4 | 18 | 28 | 44 | 84 | 87 | 72 | 10 |
| Summerlin | 1 | 2 | 6 | 5 | 22 | 25 | 23 | 9 | 2 |
| Sunrise | 1 | 9 | 15 | 14 | 20 | 23 | 28 | 18 | 1 |
| The Lakes | NA | NA | NA | NA | 3 | 5 | 8 | 3 | NA |
| The Strip | 1 | 10 | 28 | 81 | 125 | 174 | 147 | 38 | 9 |
| University | NA | 1 | 2 | 3 | 10 | 16 | 13 | 9 | 1 |
| Westside | 2 | 9 | 14 | 38 | 60 | 111 | 129 | 65 | 21 |
restaurants %>%
mutate(review_count = as.numeric(review_count)) %>%
select(business_id, neighborhood, review_count) %>%
group_by(neighborhood) %>%
summarise(Average_Number_of_Reviews = mean(review_count)) %>%
knitr::kable()| neighborhood | Average_Number_of_Reviews |
|---|---|
| Anthem | 190.00000 |
| Centennial | 107.78378 |
| Chinatown | 190.02232 |
| Downtown | 174.64260 |
| Eastside | 134.10644 |
| Northwest | 80.65734 |
| South Summerlin | 211.89062 |
| Southeast | 116.44780 |
| Southwest | 144.33553 |
| Spring Valley | 144.63977 |
| Summerlin | 98.70526 |
| Sunrise | 36.75194 |
| The Lakes | 110.42105 |
| The Strip | 408.53507 |
| University | 89.81818 |
| Westside | 126.61025 |
restaurants %>%
group_by(neighborhood) %>%
summarize(avg = mean(stars), sd = sd(stars)) %>%
arrange(avg) %>%
knitr::kable()restaurants %>%
group_by(neighborhood) %>%
summarize(avg = mean(stars), sd = sd(stars)) %>%
arrange(avg) %>%
knitr::kable()
>>>>>>> 63202e9a2379c7f7d5f2807943011ecd8f3bbc2f
| neighborhood | avg | sd |
|---|---|---|
| Sunrise | 3.240310 | 0.9418664 |
| Centennial | 3.333333 | 0.7929615 |
| The Strip | 3.364600 | 0.7062534 |
| Northwest | 3.405594 | 0.7781140 |
| Summerlin | 3.415789 | 0.7775187 |
| Southeast | 3.429234 | 0.8270031 |
| Southwest | 3.453947 | 0.8083899 |
| Eastside | 3.457983 | 0.8156988 |
| South Summerlin | 3.523438 | 0.4994417 |
| Anthem | 3.550000 | 0.7619420 |
| University | 3.572727 | 0.7227659 |
| Westside | 3.609131 | 0.7794122 |
| Spring Valley | 3.631124 | 0.7712010 |
| Downtown | 3.707581 | 0.7692319 |
| Chinatown | 3.712054 | 0.6668898 |
| The Lakes | 3.789474 | 0.4806185 |
restaurants %>%
filter(latitude > 35.6699 & latitude < 36.6699) %>%
filter(longitude < -114.6398 & longitude > -115.6398) %>%
plot_ly(x = ~longitude, y = ~latitude, type = "scatter", mode = "markers",
alpha = 0.5,
color = ~stars, hoverinfo = 'text',
text = ~paste(name, " @", neighborhood, "\n", address, "\n", city, ", ", state, postal_code, "\n", stars, "stars on Yelp")) %>%
layout(xaxis = list(title = "Longitude"),
yaxis = list(title = "Latitude"))Center of Las Vegas: 36.1699° N, 115.1398° W. Plotly displayed is surrounds this center by 0.5 degrees longitude and latitude.
restaurants %>%
filter(latitude > 35.6699 & latitude < 36.6699) %>%
filter(longitude < -114.6398 & longitude > -115.6398) %>%
plot_ly(x = ~longitude, y = ~latitude, type = "scatter", mode = "markers",
alpha = 0.5,
color = ~stars, hoverinfo = 'text',
text = ~paste(name, " @", neighborhood, "\n", address, "\n", city, ", ", state, postal_code, "\n", stars, "stars on Yelp")) %>%
layout(xaxis = list(title = "Longitude"),
yaxis = list(title = "Latitude"))
restaurants %>%
mutate(stars = if_else(stars == 1, "1",
if_else(stars == 1.5, "1.5",
if_else(stars == 2, "2",
if_else(stars == 2.5, "2.5",
if_else(stars == 3, "3",
if_else(stars == 4, "4",
if_else(stars == 4.5, "4.5", "5"))))))),
review_count = as.numeric(review_count)) %>%
group_by(stars) %>%
plot_ly(x = ~stars, y = ~review_count, color = ~stars, type = "bar", colors = "Set3") %>%
layout(xaxis = list(title = "Stars"),
yaxis = list(title = "Number of Reviews"))
popular <- categories %>%
filter(category == "Restaurants" | category == "Food") %>%
distinct(business_id) %>%
left_join(categories, by = "business_id") %>%
filter(category %in% c("Bars", "Breakfast & Brunch", "Chinese", "Italian", "Mexican", "Chicken Wings", "Salad", "Sushi Bars", "Pizza", "Steakhouses", "Fast Food"))
restaurants %>%
select(business_id, neighborhood) %>%
inner_join(popular) %>%
distinct() %>%
group_by(neighborhood, category) %>%
tally() %>%
plotly::plot_ly(x = ~neighborhood, y = ~n, type = 'bar', color = ~category, hoverinfo = 'text',
text = ~paste(neighborhood, " has ",
n, " ", category, " restaurants.")) %>%
layout(yaxis = list(title = "Restaurants"), xaxis = list(title = "", tickangle = -45), barmode = 'stack')
## Joining, by = "business_id"
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
restaurants %>%
inner_join(popular) %>%
select(stars, category) %>%
group_by(category) %>%
summarise(avg_stars = mean(stars)) %>%
arrange(desc(avg_stars)) %>%
knitr::kable()
## Joining, by = "business_id"
| category | avg_stars |
|---|---|
| Sushi Bars | 3.803977 |
| Steakhouses | 3.791096 |
| Salad | 3.742958 |
| Breakfast & Brunch | 3.626866 |
| Bars | 3.600000 |
| Italian | 3.558333 |
| Mexican | 3.537471 |
| Chinese | 3.372047 |
| Pizza | 3.369863 |
| Chicken Wings | 3.174497 |
| Fast Food | 2.855289 |
restaurants %>%
inner_join(popular) %>%
plot_ly(x = ~longitude, y = ~latitude, type = "scatter", mode = "markers",
alpha = 0.9,
color = ~category, hoverinfo = 'text',
text = ~paste(name, " @", neighborhood, "\n", address, "\n", city, ", ", state, postal_code, "\n", stars, "star", category, "on Yelp.")) %>%
layout(xaxis = list(title = "Longitude"),
yaxis = list(title = "Latitude"))
## Joining, by = "business_id"
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
>>>>>>> 63202e9a2379c7f7d5f2807943011ecd8f3bbc2f
popular <- categories %>%
filter(category == "Restaurants" | category == "Food") %>%
distinct(business_id) %>%
left_join(categories, by = "business_id") %>%
filter(category %in% c("Bars", "Breakfast & Brunch", "Chinese", "Italian", "Mexican", "Chicken Wings", "Salad", "Sushi Bars", "Pizza", "Steakhouses", "Fast Food"))
restaurants %>%
select(business_id, neighborhood) %>%
inner_join(popular) %>%
distinct() %>%
group_by(neighborhood, category) %>%
tally() %>%
plotly::plot_ly(x = ~neighborhood, y = ~n, type = 'bar', color = ~category, hoverinfo = 'text',
text = ~paste(neighborhood, " has ",
n, " ", category, " restaurants.")) %>%
layout(yaxis = list(title = "Restaurants"), xaxis = list(title = "", tickangle = -45), barmode = 'stack')restaurants %>%
inner_join(popular) %>%
plot_ly(x = ~longitude, y = ~latitude, type = "scatter", mode = "markers",
alpha = 0.9,
color = ~category, hoverinfo = 'text',
text = ~paste(name, " @", neighborhood, "\n", address, "\n", city, ", ", state, postal_code, "\n", stars, "star", category, "on Yelp.")) %>%
layout(xaxis = list(title = "Longitude"),
yaxis = list(title = "Latitude"))